cat("\014") #clear the console
rm(list=ls()) #dclearing the environment variables
library(data.table)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.1
# loading raw data
dataset = read.csv("severeinjury1.csv")
dataset$Address = paste(dataset$Address1, dataset$Address2)
dataset$Address1 = NULL
dataset$Address2 = NULL
dataset = as.data.frame(dataset)
dataset = dataset[c(1,2,3,4,25,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24)]
#loadingPre-processed data
dataset = read.csv("processed_data.csv")
#function to convert first letter as a character
first_character_uppercase = function(x) {
substr(x,1,1) = toupper(substr(x,1,1))
x
}
#dataset$Employer = paste(toupper(substr(dataset$Employer, 1, 1)),
#substr(dataset$Employer, 2, nchar(dataset$Employer)), sep="")
dataset$EventDate.1 = NULL
dataset$Employer = tolower(dataset$Employer)
dataset$City = tolower(dataset$City)
dataset$State = tolower(dataset$State)
dataset$Employer = first_character_uppercase(dataset$Employer)
dataset$City = first_character_uppercase(dataset$City)
dataset$State = first_character_uppercase(dataset$State)
dataset$count = rep(1, nrow(dataset))
dt = data.table(dataset)
injurycount_state = dt[,sum(count),by=State]
colnames(injurycount_state)[2] = "count"
#rm(dt)
state_count = ggplot(injurycount_state, aes(x=State, y=count))+geom_point()+ ggtitle("State vs count of the injury")+xlab("State")+theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 90, hjust = 1))
colnames(dataset)[27] = "Industry_codes"
dataset$industrynames = ifelse(dataset$Industry_codes == "11", "Agricultural, Forestry, Fishing and Hunting",
ifelse(dataset$Industry_codes == "21", "Mining, Quarrying, Oil and Gas extraction",
ifelse(dataset$Industry_codes == "22", "Utilities",
ifelse(dataset$Industry_codes == "23", "Construction",
ifelse(dataset$Industry_codes == "31", "Manufacturing",
ifelse(dataset$Industry_codes == "32", "Manufacturing",
ifelse(dataset$Industry_codes == "33", "Manufacturing",
ifelse(dataset$Industry_codes == "42", "Wholesale Trade",
ifelse(dataset$Industry_codes == "44", "Retail Trade",
ifelse(dataset$Industry_codes == "45", "Retail Trade",
ifelse(dataset$Industry_codes == "48", "Transportation and Warehousing",
ifelse(dataset$Industry_codes == "49", "Transportation and Warehousing",
ifelse(dataset$Industry_codes == "51", "Information",
ifelse(dataset$Industry_codes == "52", "Finance and Insurance",
ifelse(dataset$Industry_codes == "53", "Real Estate and Rental and Leasing",
ifelse(dataset$Industry_codes == "54", "Professional, Scientific and Technical services",
ifelse(dataset$Industry_codes == "55", "Management of Companies and Enterprises",
ifelse(dataset$Industry_codes == "56", "Administrative and Support services",
ifelse(dataset$Industry_codes == "61", "Educational Service",
ifelse(dataset$Industry_codes == "62", "Health Care and Social Assistance",
ifelse(dataset$Industry_codes == "71", "Arts, Entertainment and Recreation",
ifelse(dataset$Industry_codes == "72", "Accomodation and Food services",
ifelse(dataset$Industry_codes == "81", "Other services", "Public Administartion")))))))))))))))))))))))
dt1 = data.table(dataset)
injurycount_industry = dt1[,sum(count),by=industrynames]
colnames(injurycount_industry)[2] = "count"
#rm(dt)
industry_count = ggplot(injurycount_industry, aes(x=industrynames, y=count))+geom_point(color = "blue", size = 3, shape= 17)+ ggtitle("Industry vs count of the injury")+xlab("Industry")+theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))
colnames(dataset)[27] = "Industrycode"
#plot = ggplot()+geom_point(dataset, aes(x=dataset$))
#rmarkdown::render("dataset_script.R", "html_document")
# multiplot(state_count, industry_count)
print(state_count)

print(industry_count)

Injuryprone_area = dataset[dataset$State == "Texas",]
dt2 = data.table(Injuryprone_area)
injurycount_city = dt2[,sum(count),by=City]
colnames(injurycount_city)[2] = "count"
#rm(dt)
library(memisc)
## Warning: package 'memisc' was built under R version 3.4.1
## Loading required package: lattice
## Loading required package: MASS
##
## Attaching package: 'memisc'
## The following objects are masked from 'package:stats':
##
## contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
##
## as.array
suppressMessages(library(memisc, warn.conflicts = FALSE, quietly=TRUE))
Maxinjuries_city = injurycount_city[injurycount_city$count > 30]
city_count = ggplot(Maxinjuries_city, aes(x=City, y=count))+geom_bar(stat = "identity")+ ggtitle("Areas where maximum injuries occurred in Texas")+xlab("City")+theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))
# colnames(dataset)[27] = "Industrycode"
print(city_count)

library(memisc)
suppressMessages(library(memisc, warn.conflicts = FALSE, quietly=TRUE))
library(googleVis)
## Warning: package 'googleVis' was built under R version 3.4.1
## Creating a generic function for 'toJSON' from package 'jsonlite' in package 'googleVis'
##
## Welcome to googleVis version 0.6.2
##
## Please read Google's Terms of Use
## before you start using the package:
## https://developers.google.com/terms/
##
## Note, the plot method of googleVis will by default use
## the standard browser to display its output.
##
## See the googleVis package vignettes for more details,
## or visit http://github.com/mages/googleVis.
##
## To suppress this message use:
## suppressPackageStartupMessages(library(googleVis))
missinglatlong = read.csv("missinglatlong.csv")
missinglatlong$Latitude_Longitude1 <- paste(missinglatlong$Latitude, missinglatlong$Longitude)
missinglatlong$Latitude_Longitude1 <- gsub(" ", ":", missinglatlong$Latitude_Longitude1)
require(googlelVis)
## Loading required package: googlelVis
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'googlelVis'
Hos<-as.numeric(missinglatlong$Hospitalized)
plot<- data.frame(name = Hos, latLong=unlist (missinglatlong$Latitude_Longitude1))
sites<- gvisMap(plot, locationvar = "latLong", options = list(displayMode = "Markers", mapType = 'normal', colorAxis = "{colors:['red','grey']}"))
plot(sites)
## starting httpd help server ...
## done
print("http://127.0.0.1:31731/custom/googleVis/MapID21ec234d793a.html")
## [1] "http://127.0.0.1:31731/custom/googleVis/MapID21ec234d793a.html"
library(memisc)
suppressMessages(library(memisc, warn.conflicts = FALSE, quietly=TRUE))
library(ggmap)
## Warning: package 'ggmap' was built under R version 3.4.1
mapy <- get_map(location = "United States", source = "google",scale = 2, zoom = 4)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=United+States&zoom=4&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=United%20States&sensor=false
ggmap(mapy) + geom_point(data = missinglatlong, aes(x = Longitude, y = Latitude, alpha = 0.00001, colour = 'red'))
## Warning: Removed 54 rows containing missing values (geom_point).

ggmap(mapy) + geom_point(data = missinglatlong, aes(x = Longitude, y = Latitude, alpha = 0.00001, colour = factor(Hospitalized)))
## Warning: Removed 54 rows containing missing values (geom_point).

ggmap(mapy) + geom_point(data = missinglatlong, aes(x = Longitude, y = Latitude, alpha = 0.00001, colour = factor(Amputation)))
## Warning: Removed 54 rows containing missing values (geom_point).

texas <- get_map(location = "Houston", source = "google",scale = 2, zoom = 7)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Houston&zoom=7&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Houston&sensor=false
ggmap(texas) + geom_point(data = missinglatlong, aes(x = Longitude, y = Latitude, alpha = 0.00001))
## Warning: Removed 19246 rows containing missing values (geom_point).
